import numpy as ny
import pandas as ps
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import time


tic=time.time()


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


cred=ps.read_csv("/content/drive/MyDrive/Project - ITE2013 - Big Data - Credit Card Fraud Detection/dataset/creditcard.csv")

cred.head()


cred = cred.sample(frac=1)
cred


#We don't need Time and Class feature as they are unused - saw in other algorithm models

features = cred.drop(["Time","Class"],axis=1)
features


labels = ps.DataFrame(cred[["Class"]])
labels


features_array = features.values
features_array

array([[-5.90782921e+00,  4.49503625e+00, -3.68287879e+00, ...,
        -1.39371378e+00, -2.22022345e-01,  1.77000000e+00],
       [-2.78386549e+00,  1.59682358e+00, -2.08484399e+00, ...,
        -1.33309976e+00,  4.28633994e-01,  1.56000000e+02],
       [-1.15042772e+00, -7.91315138e-01,  5.65063609e-01, ...,
        -7.69154290e-02,  4.73183520e-02,  9.07000000e+01],
       ...,
       [-2.03265075e+00,  4.09650644e-01,  7.36766099e-01, ...,
         9.53257400e-03, -3.69568310e-02,  2.62200000e+01],
       [ 1.64381529e+00, -2.15224043e+00,  6.13898230e-01, ...,
         2.76443020e-02, -1.28499500e-03,  2.00000000e+02],
       [-1.36132758e+00,  1.33293714e+00,  4.19559452e-01, ...,
        -3.52891758e-01, -9.52247000e-03,  7.60000000e-01]])


labels_array = labels.values
labels_array

array([[0],
       [1],
       [0],
       ...,
       [0],
       [0],
       [0]])


train_feat, test_feat, train_lab, test_lab = train_test_split(features_array,labels_array, train_size=0.90)


train_feat = normalize(train_feat)
test_feat = normalize(test_feat)


#KNN (K-NEAREST NEIGHBOUR) CLASSIFICATION

KNN=KNeighborsClassifier(n_neighbors=5,algorithm="kd_tree",n_jobs=-1)
KNN.fit(train_feat,train_lab.ravel())
KNN_predicted_test_lab=KNN.predict(test_feat)


trueNeg,falsePos,falseNeg,truePos=confusion_matrix(test_lab,KNN_predicted_test_lab).ravel()


accuracy = accuracy_score(test_lab,KNN_predicted_test_lab)
precison = precision_score(test_lab,KNN_predicted_test_lab)
recall = recall_score(test_lab,KNN_predicted_test_lab)
f1_score = f1_score(test_lab,KNN_predicted_test_lab)


print("Confusion Matrix of KNN")
print("True Negative = ",trueNeg," || False Positive = ",falsePos)
print("False Negative = ",falseNeg," || True Positive = ",truePos)
print(" ")
print("SCORES VIA METRICS --")
print("Accuracy ==>",accuracy)
print("Precison ==>",precison)
print("Recall ==>",recall)
print("F1_Score ==>",f1_score)

Confusion Matrix of KNN
True Negative =  28423  || False Positive =  4
False Negative =  19  || True Positive =  35
 
SCORES VIA METRICS --
Accuracy ==> 0.9991924440855307
Precison ==> 0.8974358974358975
Recall ==> 0.6481481481481481
F1_Score ==> 0.7526881720430108

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20	V21	V22	V23	V24	V25	V26	V27	V28	Amount
0	0.0	-1.359807	-0.072781	2.536347	1.378155	-0.338321	0.462388	0.239599	0.098698	0.363787	0.090794	-0.551600	-0.617801	-0.991390	-0.311169	1.468177	-0.470401	0.207971	0.025791	0.403993	0.251412	-0.018307	0.277838	-0.110474	0.066928	0.128539	-0.189115	0.133558	-0.021053	149.62
1	0.0	1.191857	0.266151	0.166480	0.448154	0.060018	-0.082361	-0.078803	0.085102	-0.255425	-0.166974	1.612727	1.065235	0.489095	-0.143772	0.635558	0.463917	-0.114805	-0.183361	-0.145783	-0.069083	-0.225775	-0.638672	0.101288	-0.339846	0.167170	0.125895	-0.008983	0.014724	2.69
2	1.0	-1.358354	-1.340163	1.773209	0.379780	-0.503198	1.800499	0.791461	0.247676	-1.514654	0.207643	0.624501	0.066084	0.717293	-0.165946	2.345865	-2.890083	1.109969	-0.121359	-2.261857	0.524980	0.247998	0.771679	0.909412	-0.689281	-0.327642	-0.139097	-0.055353	-0.059752	378.66
3	1.0	-0.966272	-0.185226	1.792993	-0.863291	-0.010309	1.247203	0.237609	0.377436	-1.387024	-0.054952	-0.226487	0.178228	0.507757	-0.287924	-0.631418	-1.059647	-0.684093	1.965775	-1.232622	-0.208038	-0.108300	0.005274	-0.190321	-1.175575	0.647376	-0.221929	0.062723	0.061458	123.50
4	2.0	-1.158233	0.877737	1.548718	0.403034	-0.407193	0.095921	0.592941	-0.270533	0.817739	0.753074	-0.822843	0.538196	1.345852	-1.119670	0.175121	-0.451449	-0.237033	-0.038195	0.803487	0.408542	-0.009431	0.798278	-0.137458	0.141267	-0.206010	0.502292	0.219422	0.215153	69.99

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20	V21	V22	V23	V24	V25	V26	V27	V28	Amount	Class
178103	123475.0	-5.907829	4.495036	-3.682879	-2.626879	-0.198306	3.447705	-2.558034	3.446840	2.201571	2.332888	-0.730184	1.157248	0.159355	0.692754	0.724211	0.482909	0.141694	-0.297332	-0.606919	0.645331	-0.095417	-0.823894	0.512631	0.608003	0.364096	-0.373512	-1.393714	-0.222022	1.77	0
261925	160243.0	-2.783865	1.596824	-2.084844	2.512986	-1.446749	-0.828496	-0.732262	-0.203329	-0.347046	-2.162061	1.966123	-3.127456	0.506574	-5.926131	0.931091	-2.499307	-3.712752	-1.142133	0.626241	-0.515001	0.203563	0.293268	0.199568	0.146868	0.163602	-0.624085	-1.333100	0.428634	156.00	1
194951	130837.0	-1.150428	-0.791315	0.565064	-2.139782	1.352664	-0.059039	0.260391	0.125684	-1.596782	-0.091853	-0.230739	-0.685829	-0.270623	-0.019917	-1.300188	1.093228	-0.182027	-0.804988	1.560827	0.533215	0.057807	-0.366581	-0.154412	-0.313439	0.914807	-0.196511	-0.076915	0.047318	90.70	0
197961	132257.0	2.086916	-0.842936	-3.973379	-1.698370	2.540840	2.677246	-0.276120	0.497714	-0.974467	0.342742	0.364130	-0.759274	-0.257962	-0.826524	-0.072530	0.534962	1.416166	-1.382954	0.482206	0.206675	0.296118	0.673546	-0.137164	0.675475	0.507282	0.122100	-0.052114	-0.047698	91.40	0
111956	72448.0	-0.795972	-0.337945	1.868059	1.050754	-1.120475	0.087213	-1.124932	0.708030	-1.023988	0.285413	-0.667877	0.441433	0.751321	-0.250259	0.470302	-2.091935	0.891401	1.006323	-0.498712	-0.275143	-0.172551	-0.152472	0.226367	0.365930	-0.484788	-0.311035	0.039018	-0.114734	40.00	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
160326	113237.0	1.624383	-1.005744	0.412692	1.615982	-1.085810	0.945635	-1.166649	0.368163	1.954861	-0.125110	-1.596324	0.606588	0.206027	-0.902252	-0.464850	0.204302	-0.410422	0.368665	-0.506987	0.040387	0.247959	0.817079	0.063272	0.618675	-0.228784	-0.559651	0.092650	0.011391	139.00	0
46718	42899.0	-1.213732	1.688825	-0.755056	-0.912420	3.124243	3.518855	-0.720185	-1.732199	-1.137526	-1.860717	0.036458	-0.395282	-0.530655	-0.915168	0.640247	1.222133	0.340463	1.171805	-0.883744	0.655440	-1.324803	-0.018077	-0.477582	0.836884	0.728877	-0.413261	0.029606	0.135440	1.00	0
59846	49032.0	-2.032651	0.409651	0.736766	-1.862659	1.526845	0.499899	-0.262421	-2.474291	-1.007222	-1.590705	0.276773	1.103023	0.890970	0.441409	-0.650635	1.128877	-1.287067	-0.204933	-1.089240	0.400198	-1.434896	0.022223	0.391344	-1.322870	0.684427	0.550036	0.009533	-0.036957	26.22	0
272256	164988.0	1.643815	-2.152240	0.613898	0.277621	-2.363581	0.597806	-1.773400	0.319186	1.632011	0.397138	-2.235965	0.090129	-0.397398	-1.318790	-1.546596	-1.557987	0.357367	1.408143	-0.528242	-0.273783	-0.194241	0.022026	0.081441	-0.068532	-0.576502	0.730318	0.027644	-0.001285	200.00	0
19645	30431.0	-1.361328	1.332937	0.419559	1.178256	-0.808321	1.358962	-1.425060	1.802502	-0.007368	-0.564927	-0.850215	0.472257	-0.984551	0.798282	-0.578186	-0.007858	0.449377	0.657700	1.406890	-0.445492	0.127248	0.136341	0.079341	-1.183486	-0.833094	-0.389425	-0.352892	-0.009522	0.76	0

	V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20	V21	V22	V23	V24	V25	V26	V27	V28	Amount
178103	-5.907829	4.495036	-3.682879	-2.626879	-0.198306	3.447705	-2.558034	3.446840	2.201571	2.332888	-0.730184	1.157248	0.159355	0.692754	0.724211	0.482909	0.141694	-0.297332	-0.606919	0.645331	-0.095417	-0.823894	0.512631	0.608003	0.364096	-0.373512	-1.393714	-0.222022	1.77
261925	-2.783865	1.596824	-2.084844	2.512986	-1.446749	-0.828496	-0.732262	-0.203329	-0.347046	-2.162061	1.966123	-3.127456	0.506574	-5.926131	0.931091	-2.499307	-3.712752	-1.142133	0.626241	-0.515001	0.203563	0.293268	0.199568	0.146868	0.163602	-0.624085	-1.333100	0.428634	156.00
194951	-1.150428	-0.791315	0.565064	-2.139782	1.352664	-0.059039	0.260391	0.125684	-1.596782	-0.091853	-0.230739	-0.685829	-0.270623	-0.019917	-1.300188	1.093228	-0.182027	-0.804988	1.560827	0.533215	0.057807	-0.366581	-0.154412	-0.313439	0.914807	-0.196511	-0.076915	0.047318	90.70
197961	2.086916	-0.842936	-3.973379	-1.698370	2.540840	2.677246	-0.276120	0.497714	-0.974467	0.342742	0.364130	-0.759274	-0.257962	-0.826524	-0.072530	0.534962	1.416166	-1.382954	0.482206	0.206675	0.296118	0.673546	-0.137164	0.675475	0.507282	0.122100	-0.052114	-0.047698	91.40
111956	-0.795972	-0.337945	1.868059	1.050754	-1.120475	0.087213	-1.124932	0.708030	-1.023988	0.285413	-0.667877	0.441433	0.751321	-0.250259	0.470302	-2.091935	0.891401	1.006323	-0.498712	-0.275143	-0.172551	-0.152472	0.226367	0.365930	-0.484788	-0.311035	0.039018	-0.114734	40.00
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
160326	1.624383	-1.005744	0.412692	1.615982	-1.085810	0.945635	-1.166649	0.368163	1.954861	-0.125110	-1.596324	0.606588	0.206027	-0.902252	-0.464850	0.204302	-0.410422	0.368665	-0.506987	0.040387	0.247959	0.817079	0.063272	0.618675	-0.228784	-0.559651	0.092650	0.011391	139.00
46718	-1.213732	1.688825	-0.755056	-0.912420	3.124243	3.518855	-0.720185	-1.732199	-1.137526	-1.860717	0.036458	-0.395282	-0.530655	-0.915168	0.640247	1.222133	0.340463	1.171805	-0.883744	0.655440	-1.324803	-0.018077	-0.477582	0.836884	0.728877	-0.413261	0.029606	0.135440	1.00
59846	-2.032651	0.409651	0.736766	-1.862659	1.526845	0.499899	-0.262421	-2.474291	-1.007222	-1.590705	0.276773	1.103023	0.890970	0.441409	-0.650635	1.128877	-1.287067	-0.204933	-1.089240	0.400198	-1.434896	0.022223	0.391344	-1.322870	0.684427	0.550036	0.009533	-0.036957	26.22
272256	1.643815	-2.152240	0.613898	0.277621	-2.363581	0.597806	-1.773400	0.319186	1.632011	0.397138	-2.235965	0.090129	-0.397398	-1.318790	-1.546596	-1.557987	0.357367	1.408143	-0.528242	-0.273783	-0.194241	0.022026	0.081441	-0.068532	-0.576502	0.730318	0.027644	-0.001285	200.00
19645	-1.361328	1.332937	0.419559	1.178256	-0.808321	1.358962	-1.425060	1.802502	-0.007368	-0.564927	-0.850215	0.472257	-0.984551	0.798282	-0.578186	-0.007858	0.449377	0.657700	1.406890	-0.445492	0.127248	0.136341	0.079341	-1.183486	-0.833094	-0.389425	-0.352892	-0.009522	0.76

	Class
178103	0
261925	1
194951	0
197961	0
111956	0
...	...
160326	0
46718	0
59846	0
272256	0
19645	0

Big Data Analytics Project

Credit Card Fraud Detection

Import the packages¶

Importing dataset¶

Randomize the dataset¶

Data Preparation¶

Splitting the Dataset¶

Normalizing Dataset¶

Creating the Model¶

Confusion matrix for KNN¶

Evaluation Metrics for KNN¶

Results from Training¶

Summary¶